Data Visualization Project 02

revised version of mini-project 02 goes here Marathon results dataset imported instead of read_csv since it was included into data file of project folder

Unsure how to import shapefile through project so imported same way as mini project 2

#install.packages("sf")
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
world_shapes <- read_sf( "C:/Users/hfort/OneDrive/Desktop/HF_dataviz_finproj/dataviz_final_project/data/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp")
setwd('C:/Users/hfort/OneDrive/Desktop/HF_dataviz_finproj/dataviz_final_project/data')
marathon_results_2017 = read.csv("marathon_results_2017.csv")
#marathon_results_2017
#library(readr)
#marathon_results<-read.csv("C:/Users/hfort/OneDrive/Desktop/HF_dataviz_finproj/dataviz_final_project#/data/marathon_results_2017.csv")
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
country_count <- marathon_results_2017 %>%
  group_by(Country) %>%
  mutate(count = n()) %>%
  ungroup() 
country_count <- country_count %>%
  rename(ISO_A3 = Country)


world_marathon <-world_shapes %>%
  left_join(country_count, by = "ISO_A3")
ggplot() +
  geom_sf(data = world_marathon, aes(fill = count)) +
  scale_fill_gradientn(colors = c("blue", "green", "yellow", "red")) +  # Adjust colors
  theme_light()

marathon_results_2017$M.F<-as.factor(marathon_results_2017$M.F)
class(marathon_results_2017$M.F)
## [1] "factor"
top_100 <- marathon_results_2017 %>%
  filter(Overall <= 100)

interactive_plot<- ggplot(data= top_100, aes(x = Overall, y = Gender, color = M.F))+
  geom_point()+
  theme_light()

library(plotly)
ggplotly(interactive_plot)
gender_box<-ggplot(marathon_results_2017, aes(x = M.F, y = Overall, fill = M.F)) +
  geom_boxplot() +
  theme_light()
library(plotly)
ggplotly(gender_box)
gender_violin<-ggplot(marathon_results_2017, aes(x = M.F, y = Overall, fill = M.F)) +
  geom_violin(width = 0.5) +
  #geom_jitter(height = 0, size = 1, 
   #           alpha = 0.5) +
  labs(x = "", y = "Weight") +
  guides(color = FALSE) + 
  theme_minimal()
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
gender_violin

ggsave(gender_violin, file="gender_violin.png", scale=2)
## Saving 14 x 10 in image
regression_model <- lm(Overall ~ Age,
 data = marathon_results_2017)
ggplot(marathon_results_2017, aes(x = Age, y = Overall)) +
 geom_point() +
 geom_smooth(method = "lm",
 formula = "y ~ x") +
  theme_light()

top_100 <- marathon_results_2017 %>%
  filter(Overall <= 100)

regression_model <- lm(Overall ~ Age,
 data = top_100)
ggplot(top_100, aes(x = Age, y = Overall)) +
 geom_point() +
 geom_smooth(method = "lm",
 formula = "y ~ x") +
   annotate(geom = "text",
 x = 37, y = 55,
 label = "Age clearly doesn't define fitness level!")+
  theme_light()